cap log close
clear
clear matrix
set more off

/***********************************************************************************
***	This program generates results describing trends of rotation group bias in   ***
*** the Current Population Survey as presented in                                ***
*** Krueger, Mas, and Niu (Review of Economics and Statistics, forthcoming).     ***
************************************************************************************/

// Change to the directory that contains datasets
local datadir "C:"
// Change to the directory that contains output files
local outdir "C:"

// Log file
log using `outdir'/RGB_CPS_codes_descriptive_REStat.log, replace

// Set seed for bootstrapped SE
set seed 1234567890

/***********************************************************************************
	Summarize labor force information for each year and append datasets together
	(Number of people in the labor force, employed, or unemployed)
************************************************************************************/

local var year monis age unemp emp lf weight

use `var' using `datadir'/base2014ax.dta if age >= 16 & lf < .
drop age
save `outdir'/data_by_year.dta, replace

foreach y of numlist 1976/2013 {
	use `var' using `datadir'/base`y'ax.dta if age >= 16 & lf < .
	drop age
	append using `outdir'/data_by_year.dta
	save `outdir'/data_by_year.dta, replace
}

sort year
compress
save `outdir'/data_by_year.dta,replace

// Calculate summary statistics
collapse (sum) lf emp unemp [pw=weight], by(year monis)
* Calculate unemployment rate
gen unemprt = unemp/lf*100
sort year monis
compress
save `outdir'/RGB_CPS_annual_summary.dta, replace


/***********************************************************************************
	Figure 1: Annual unemployment rate and difference from the BLS estimates
************************************************************************************/

// Read in annual unemployment rate from BLS
clear
input year unemprt_BLS
1976 7.7
1977 7.1
1978 6.1
1979 5.8
1980 7.1
1981 7.6
1982 9.7
1983 9.6
1984 7.5
1985 7.2
1986 7.0
1987 6.2
1988 5.5
1989 5.3
1990 5.6
1991 6.8
1992 7.5
1993 6.9
1994 6.1
1995 5.6
1996 5.4
1997 5.0
1998 4.5
1999 4.2
2000 4.0
2001 4.7
2002 5.8
2003 6.0
2004 5.5
2005 5.1
2006 4.6
2007 4.6
2008 5.8
2009 9.3
2010 9.6
2011 9.0
2012 8.1
2013 7.4
2014 6.2
end
save `outdir'/unemprt_BLS.dta, replace

// Read in unemployment rate by year and MIS
use year monis unemprt using `outdir'/RGB_CPS_annual_summary.dta, clear

// Reshape the data from (year*MIS) to (year-by-MIS)
reshape wide unemprt, i(year) j(monis)

// Merge unemployment rate by year and MIS with the BLS annual estimates
merge 1:1 year using `outdir'/unemprt_BLS.dta, nogen
erase `outdir'/unemprt_BLS.dta

// Calculate the difference from the BLS estimates by MIS
foreach i of numlist 1/8 {
	gen unemprtdif`i' = unemprt`i'-unemprt_BLS
}

// Figure 1(a): annual unemployment rate by MIS and year
#delimit ;
line unemprt1-unemprt8 year,
	legend(order(1 "MIS1" 2 "MIS2" 3 "MIS3" 4 "MIS4" 5 "MIS5" 6 "MIS6" 7 "MIS7" 8 "MIS8") row(2))
	xtitle("Year")
	ytitle("Unemployment rate (Percentage Point)")
	xlabel(1975(10)2015)
	saving(`outdir'/unemprt_by_year_MIS.gph, replace);
#delimit cr
graph export `outdir'/unemprt_by_year_MIS.pdf, replace

// Figure 1(b): difference from BLS unemployment rate by MIS
#delimit ;
line unemprtdif1-unemprtdif8 year,
	legend(order(1 "MIS1" 2 "MIS2" 3 "MIS3" 4 "MIS4" 5 "MIS5" 6 "MIS6" 7 "MIS7" 8 "MIS8") row(2))
	xtitle("Year")
	ytitle("Unemployment rate - BLS (Percentage Point)")
	xlabel(1975(10)2015)
	saving(`outdir'/unemprt_by_year_MIS_minusBLS.gph, replace);
#delimit cr
graph export `outdir'/unemprt_by_year_MIS_minusBLS.pdf, replace


/***********************************************************************************
	Calculate multiplicative AND additive indices of rotation group bias
************************************************************************************/

use `outdir'/RGB_CPS_annual_summary.dta, clear

// Calculate average across 8 rotation groups in each year
bysort year: egen double unemprt_avg = mean(unemprt)
// Calculate multiplicative and additive indices
gen unemprt_ind = unemprt/unemprt_avg*100
gen unemprt_dif = unemprt-unemprt_avg

keep year monis unemprt_avg unemprt_ind unemprt_dif

// Calculate slope of multiplicative and additive indices
gen slope_ind = .	// Slope of multiplicative indices
gen slope_dif = .	// Slope of additive indices
sum year
local minyr = r(min)
local maxyr = r(max)
foreach y of numlist `minyr'/`maxyr' {
	quietly count if year == `y'
	if (r(N) > 0) {
		quietly reg unemprt_ind monis if year == `y'
		replace slope_ind = _b[monis] if year == `y'
		quietly reg unemprt_dif monis if year == `y'
		replace slope_dif = _b[monis] if year == `y'
	}
}

// Reshape the data from (year*MIS*measure) to (year*measure-by-MIS)
reshape wide unemprt_ind unemprt_dif, i(year unemprt_avg slope_ind slope_dif) j(monis)

// Organize output dataset
sort year
order year unemprt_ind* unemprt_dif* unemprt_avg slope_ind slope_dif

foreach i of numlist 1/8 {
	label var unemprt_ind`i' "Multiplicative indices, MIS`i'"
	label var unemprt_dif`i' "Additive indices, MIS`i'"
}
label var unemprt_avg "Average over MIS"
label var slope_ind "Slope of multiplicative indices"
label var slope_dif "Slope of additive indices"

compress
save `outdir'/RGB_CPS_annual_indices.dta, replace


/***********************************************************************************
	Figure 2: multiplicative indices of RGB over time (fitted time trend).
************************************************************************************/

* Fit a linear trend for bias of unemployment rate
use `outdir'/RGB_CPS_annual_indices.dta, clear
gen time = year-1994
gen post93 = (year >= 1994)
gen post93_time = post93 * time
// Trend break only
reg slope_ind time post93
predict yhat, xb
// with interaction term
reg slope_ind time post93 post93_time
predict yhat_int, xb

#delimit ;
twoway (line slope_ind year, lp(solid)) 
       (line yhat      year, lp(solid))
       (line yhat_int  year, lp(dash)),
       xtitle("Year")
       xlabel(1975(5)2015)
       ytitle("Magnitude of Bias")
       legend(order(1 "Mutiplicative indices" 2 "Fitted line (w/ level shift in 1994)" 3 "Fitted line (w/ level shift and trend break in 1994)") row(3))
	   saving(`outdir'/RGB_CPS_annual_bias.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_annual_bias.pdf, replace


/***********************************************************************************
	Figure B1 and B2: multiplicative AND additive indices of rotation group bias
************************************************************************************/
use `outdir'/RGB_CPS_annual_indices.dta, clear

// Reshape from (year-by-MIS) to (year*MIS)
reshape long unemprt_ind unemprt_dif, i(year unemprt_avg slope_ind slope_dif) j(monis)

replace slope_ind = round(slope_ind, 0.01)
replace slope_dif = round(slope_dif, 0.01)

// Plot RGB with multiplicative indices by year
#delimit ;
twoway (scatter unemprt_ind monis, connect(l) mc(edkblue)) 
	   (scatter unemprt_ind monis if monis == 7, mlabel(slope_ind) mlabs(large) mlabp(12) mlabg(*8) mc(edkblue)),
	by(year,iscale(*1.4) rows(6) legend(off) compact note("") title("Rotation Group Bias in the Current Population Survey, 1976-2014",size(med)))
	xtitle("Month in Sample")
	ytitle("Multiplicative Bias Factor")
	ylabel(,labs(medium))
	xlabel(,labs(medium))
	saving(`outdir'/RGB_CPS_annual_indices_mul.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_annual_indices_mul.pdf, replace

// Plot RGB with additive indices by year
#delimit ;
twoway (scatter unemprt_dif monis, connect(l) mc(edkblue)) 
	   (scatter unemprt_dif monis if monis == 7, mlabel(slope_dif) mlabs(large) mlabp(12) mlabg(*8) mc(edkblue)),
	by(year,iscale(*1.4) rows(6) legend(off) compact note("") title("Rotation Group Bias in the Current Population Survey, 1976-2014",size(med)))
	xtitle("Month in Sample")
	ytitle("Additive Bias Factor")
	ylabel(,labs(small))
	xlabel(,labs(medium))
	saving(`outdir'/RGB_CPS_annual_indices_add.gph, replace);
#delimit cr
graph export `outdir'/RGB_CPS_annual_indices_add.pdf, replace


/***********************************************************************************
	Table 2: multiplicative indices of rotation group bias for two periods:
		1976-1993 and 1994-2014
************************************************************************************/

use `outdir'/RGB_CPS_annual_summary.dta, clear

// Present the numbers of unemployed and employed workers in thousands
replace unemp = unemp/1000
replace emp = emp/1000

foreach var of varlist unemp emp unemprt {
	// Calculate average over 8 rotation groups in each year
	bysort year: egen double `var'_avg = mean(`var')
	// Calculate multiplicative indices
	gen `var'_ind = `var'/`var'_avg*100
}

keep year monis *_avg *_ind

// Reshape the data from "year*MIS-by-measure" to "year*MIS*measure"
rename unemp_avg avg1
rename emp_avg avg2
rename unemprt_avg avg3

rename unemp_ind ind1
rename emp_ind ind2
rename unemprt_ind ind3

reshape long avg ind, i(year monis) j(measure)

// Calculate slope of multiplicative indices
gen slope = .
sum year
local minyr = r(min)
local maxyr = r(max)
foreach i of numlist `minyr'/`maxyr' {
	foreach j of numlist 1/3 {
		quietly count if year == `i' & measure == `j'
		if (r(N) > 0) {
			quietly reg ind monis if year == `i' & measure == `j'
			replace slope = _b[monis] if year == `i' & measure == `j'
		}
	}
}

// Reshape the data from "year*MIS*measure" to "year*measure-by-MIS"
reshape wide ind, i(year measure avg slope) j(monis)

// Calculate average for two periods
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse ind1-ind8 avg slope, by(measure period)

sort measure period
order measure period ind1-ind8 avg slope

// Organize output data
#delimit ;
label define measure
	1 "# Unemp"
	2 "# Emp"
	3 "Unempr rate";
#delimit cr
label values measure measure

label define period 1 "1976-1993" 2 "1994-2014"
label values period period

foreach i of numlist 1/8 {
	label var ind`i' "Multiplicative indices, MIS`i'"
}
label var avg "Average over MIS"
label var slope "Slope of multiplicative indices"

compress
save `outdir'/RGB_CPS_indices.dta, replace


*~~~~~~~~~ Bootstrapped SE for Table 2 ~~~~~~~~~
clear

// Define parameters
local iteration = 500
local minyr = 1976
local maxyr = 2014

// Create an empty dataset to store RGB of labor force measures from bootstrapped sample
set obs 1 
gen year = .
gen iteration = .
gen measure = .
gen slope = .
save `outdir'/RGB_CPS_indices_bootstrap.dta, replace
* Keep a count of obs in the data
local obscnt = 1

// Suspend log file for this portion
log close

// Draw samples from the two periods
foreach y of numlist `minyr'/`maxyr' {	
	foreach i of numlist 1/`iteration' {
	
		di "Year `y' and iteration `i'
	
		use year monis emp unemp lf weight using `outdir'/data_by_year.dta if year == `y', clear
		gen wt = .
		bsample, weight(wt)
		replace wt = wt*weight
		drop weight

		collapse (sum) emp unemp lf [pw=wt], by(monis)
		replace unemp = unemp/1000
		replace emp = emp/1000
		gen unemprt = unemp/lf*100
		
		rename unemp   value1
		rename emp     value2
		rename unemprt value3
		
		save `outdir'/temp.dta, replace
		
		foreach j of numlist 1/3 {
		
			use `outdir'/temp.dta, clear
		
			// RGB multiplicative indices
			egen double avg = mean(value`j')
			gen ind`j' = value`j'/avg*100	
			quietly reg ind`j' monis
			
			// Store values
			use `outdir'/RGB_CPS_indices_bootstrap.dta, clear
			replace year = `y' in `obscnt'
			replace iteration = `i' in `obscnt'
			replace measure = `j' in `obscnt'
			replace slope = _b[monis] in `obscnt'
			
			// Increment the number observations
			local obscnt = `obscnt' + 1	
			set obs `obscnt'	// Create a new blank row
		
			save `outdir'/RGB_CPS_indices_bootstrap.dta, replace
		}
		
		erase `outdir'/temp.dta
		
	}
}

// Re-start log file
log using `outdir'/RGB_CPS_codes_descriptive_REStat.log

use `outdir'/RGB_CPS_indices_bootstrap.dta, clear
outsheet using `outdir'/RGB_CPS_indices_bootstrap.csv, comma replace

// Calculate average over pre and post-1994 period
use `outdir'/RGB_CPS_indices_bootstrap.dta if year < ., clear
gen period = 1 if year < 1994
replace period = 2 if year >= 1994
collapse slope, by(measure iteration period)

// Calculate bootstrapped standard error (or SD of sampled statistics) by year
collapse slope (sd) slope_se = slope, by(measure period)
rename slope slope_mean
keep measure period slope_mean slope_se

// Merge bootstrapped SE with the average indices
merge 1:1 measure period using `outdir'/RGB_CPS_indices.dta, nogen
order slope_mean slope_se, last
save `outdir'/RGB_CPS_indices.dta, replace

outsheet using `outdir'/RGB_CPS_indices.csv, comma replace

// Delete raw data
erase `outdir'/data_by_year.dta


log close
set more on
